Affordable Housing Distribution vs Privately Owned Public Spaces (POPS)

Following this, the analysis focuses on the distribution of Privately Owned Public Spaces (POPS). These spaces contribute to urban livability and accessibility, particularly for communities with limited access to public amenities. Examining the distribution of POPS in relation to affordable housing and low-income populations allows for a better understanding of whether these shared resources are equitably distributed or concentrated in specific areas.

Code
import pandas as pd
import geopandas as gpd
import hvplot.pandas
import holoviews as hv
from shapely.geometry import Point

hv.extension('bokeh')

# Load datasets
affordable_housing_data = pd.read_csv("./geographicdatascience_python_finalproject/database/Affordable_Housing_Production_by_Building_20241224.csv")
pops_data = pd.read_csv("./geographicdatascience_python_finalproject/database/Privately Owned Public Spaces_pops_24v2_csv/pops_24v2.csv")
geo_data = gpd.read_file("./geographicdatascience_python_finalproject/database/nyc_tracts.json")

# Step 1: Prepare Affordable Housing Data
# Convert latitude and longitude to geometry for GeoDataFrame
affordable_housing_data["geometry"] = affordable_housing_data.apply(
    lambda row: Point(row["Longitude"], row["Latitude"]), axis=1
)

# Convert to GeoDataFrame
affordable_housing_gdf = gpd.GeoDataFrame(
    affordable_housing_data,
    geometry="geometry",
    crs="EPSG:4326"
)

# Ensure CRS matches between GeoDataFrames
affordable_housing_gdf = affordable_housing_gdf.to_crs(geo_data.crs)

# Summarize affordable housing count by census tract
affordable_housing_with_tracts = gpd.sjoin(affordable_housing_gdf, geo_data, how="left", predicate="intersects")
housing_summary_by_tract = (
    affordable_housing_with_tracts.groupby("BoroCT2020")["Building ID"]
    .count()
    .reset_index()
)
housing_summary_by_tract.rename(columns={"Building ID": "AffordableHousingCount"}, inplace=True)

# Merge affordable housing summary into geo_data
geo_data = geo_data.merge(housing_summary_by_tract, on="BoroCT2020", how="left")
geo_data["AffordableHousingCount"] = geo_data["AffordableHousingCount"].fillna(0)

# Step 2: Prepare POPS Data
# Ensure numeric data types in pops_data for longitude and latitude
pops_data["longitude"] = pd.to_numeric(pops_data["longitude"], errors="coerce")
pops_data["latitude"] = pd.to_numeric(pops_data["latitude"], errors="coerce")

# Drop rows with missing or invalid coordinates
pops_data = pops_data.dropna(subset=["longitude", "latitude"])

# Convert POPS data to GeoDataFrame
pops_data["geometry"] = pops_data.apply(
    lambda row: Point(row["longitude"], row["latitude"]), axis=1
)
pops_gdf = gpd.GeoDataFrame(
    pops_data,
    geometry="geometry",
    crs="EPSG:4326"
)

# Ensure CRS matches
pops_gdf = pops_gdf.to_crs(geo_data.crs)

pops_with_tracts = gpd.sjoin(pops_gdf, geo_data, how="left", predicate="within")
pops_summary = pops_with_tracts.groupby("BoroCT2020").size().reset_index(name="POPS_Count")

geo_data = geo_data.merge(pops_summary, on="BoroCT2020", how="left")
geo_data["POPS_Count"] = geo_data["POPS_Count"].fillna(0)  # Fill missing counts with 0


# Affordable Housing Map
map_affordable_housing = geo_data.hvplot.polygons(
    "geometry",
    color="AffordableHousingCount",
    cmap="Reds",
    line_color="white",
    hover_cols=["BoroCT2020", "AffordableHousingCount"],
    title="Affordable Housing Distribution by Census Tract",
    aspect="equal",
    clim=(0, 50),
    clipping_colors={"max": "darkred"},
    colorbar=True
)

# POPS Map
map_pops = geo_data.hvplot.polygons(
    "geometry",
    color="POPS_Count",
    cmap="Blues",
    line_color="white",
    hover_cols=["BoroCT2020", "POPS_Count"],
    title="POPS Distribution by Census Tract",
    aspect="equal",
    colorbar=True
)

# Combined Map
combined_map = map_affordable_housing + map_pops
combined_map

If we zoom in on the distribution of POPS, we can see that it is basically dispersed in places where there are very few affordable housing, suggesting that there may be a negative correlation between the two. Next, we will test this through linear correlation analysis and cluster analysis.